library(tidyverse)
library(dplyr, warn.conflicts = FALSE)
library(here)
library(plotly)
library(highcharter)
library(stringr)
theme_set(theme_bw())

Data Overview

data <- read_csv(here::here("data/jackson.csv"), 
                            col_types = cols(
                                .default = col_double(),
                                album_uri = col_character(),
                                album_name = col_character(),
                                album_img = col_character(),
                                album_release_date = col_character(),
                                album_release_year = col_date(format = ""),
                                album_popularity = col_integer(),
                                track_name = col_character(),
                                track_uri = col_character(),
                                key = col_character(),
                                mode = col_character(),
                                time_signature = col_integer(),
                                key_mode = col_character(),
                                track_popularity = col_integer()
                                )) %>%
         mutate(album_name = gsub(".*(1954).*",
                                         "The Music of Brazil/Jackson do Pandeiro",
                                  album_name));
data %>% 
    glimpse()
Observations: 500
Variables: 23
$ album_uri          <chr> "5T9tTjPIfjbUJGRJdYOOLl", "5T9tTjPIfjbUJGRJdYOOLl", "5T9tTjPIfjb...
$ album_name         <chr> "Jackson Do Pandeiro Volume 1: Tum, Tum, Tum!", "Jackson Do Pand...
$ album_img          <chr> "https://i.scdn.co/image/5dcc4a0cad740f1ee0774196d0a14f3693ef887...
$ album_release_date <chr> "1958-11-11", "1958-11-11", "1958-11-11", "1958-11-11", "1958-11...
$ album_release_year <date> 1958-11-11, 1958-11-11, 1958-11-11, 1958-11-11, 1958-11-11, 195...
$ album_popularity   <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ track_name         <chr> "Tum, Tum, Tum", "Pacífico Pacato", "Nortista Quatrocentão", "Se...
$ track_uri          <chr> "6cCYhV6fU68uzbjWPG9V7x", "6Gu7y9SgtVTGh8YGhDPtCe", "1hq7M7cJtvD...
$ danceability       <dbl> 0.501, 0.663, 0.550, 0.447, 0.544, 0.571, 0.495, 0.572, 0.500, 0...
$ energy             <dbl> 0.987, 0.962, 0.947, 0.969, 0.972, 0.926, 0.967, 0.986, 0.947, 0...
$ key                <chr> "A", "F", "D", "G", "E", "F", "E", "C", "F", "A#", "E", "F", "D#...
$ loudness           <dbl> 2.561, 1.137, 1.621, 2.743, 2.513, 2.414, 2.375, 2.597, 3.078, 3...
$ mode               <chr> "major", "major", "major", "major", "minor", "major", "minor", "...
$ speechiness        <dbl> 0.0429, 0.1810, 0.0469, 0.0549, 0.0502, 0.0344, 0.0576, 0.0367, ...
$ acousticness       <dbl> 0.718, 0.738, 0.666, 0.759, 0.787, 0.651, 0.712, 0.194, 0.286, 0...
$ instrumentalness   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ liveness           <dbl> 0.282, 0.200, 0.251, 0.333, 0.176, 0.342, 0.321, 0.301, 0.323, 0...
$ valence            <dbl> 0.963, 0.961, 0.923, 0.899, 0.783, 0.961, 0.755, 0.989, 0.957, 0...
$ tempo              <dbl> 101.676, 113.562, 116.125, 116.023, 112.863, 133.065, 117.822, 1...
$ duration_ms        <dbl> 158133, 139773, 163173, 143733, 151653, 157480, 158133, 154680, ...
$ time_signature     <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4...
$ key_mode           <chr> "A major", "F major", "D major", "G major", "E minor", "F major"...
$ track_popularity   <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0...

Danceability

data %>% 
    ggplot(aes(sample=danceability)) + 
        stat_qq()

hchart (data$danceability,
        color = "#B71C1C",
        name = "Dançabilidade")

Speechines

data %>% 
    ggplot(aes(sample=speechiness)) + 
        stat_qq()

hchart (data$speechiness,
        color = "#B71C1C",
        name = "Verbosidade")

Duration

data <- data %>%
    mutate(duration_s = duration_ms/1000)
data %>% 
    select(duration_s) %>%
    glimpse()
Observations: 500
Variables: 1
$ duration_s <dbl> 158.133, 139.773, 163.173, 143.733, 151.653, 157.480, 158.133, 154.680, ...
data %>% 
    ggplot(aes(sample=duration_s)) + 
        stat_qq()

hchart (data$duration_s,
        color = "#B71C1C",
        name = "Duração (s)")

Remaster

Ultimo album “1981: Isso é que é Forró!”

data <- data %>%
    mutate(remaster = album_release_date > "1981-30-12")
data %>% 
    select(album_name, album_release_year, remaster) %>%
    sample_n(10)
temp <- data %>% 
    distinct(album_name,
             .keep_all = TRUE) %>%
    mutate(remaster = ifelse(remaster == TRUE,"remasterizado","original"))
hchart(temp$remaster,
       colorByPoint = TRUE,
       name="Álbum")

Mais discurso/diálogo (speechines) tem efeito sobre a dançabilidade da música?

p <- data %>% 
        distinct(track_name, .keep_all = TRUE) %>%
        ggplot(aes(x=speechiness,
                   y=danceability)) +
        geom_point(alpha=0.4) 
ggplotly(p)
data %>% 
    distinct(track_name, .keep_all = TRUE) %>%
    ggplot(aes(speechiness,danceability)) +
    stat_density2d(aes(fill = ..level..), geom = "polygon")

data %>%
    group_by(track_name) %>%
    top_n(10, speechiness)

Como o passar dos anos afeta o tempo de duração da música?

temp <-
    data %>% 
    distinct(album_name, .keep_all = TRUE) %>%
    group_by(album_release_year) %>%
    summarise(original_n = sum(!remaster),
              remaster_n = sum(remaster))
highchart() %>%
  hc_xAxis(categories = temp$album_release_year) %>%
  hc_add_series(temp$original_n, 
                type = "column",
                color = "#B71C1C",
                name = "Não remasterizado") %>%
  hc_add_series(temp$remaster_n, 
                type = "column",
                name = "Remasterizado")  %>%
      hc_title(text = "Número de álbuns por ano")
data %>% 
    ggplot(aes(x=as.factor(album_release_year),
               duration_s,
               group=album_release_year,
               color=remaster)) +
    geom_boxplot(position = "dodge", alpha=0.6) +
    theme(axis.text.x = element_text(angle = 30, hjust = 1))

Os albuns relançados/remasterizados são mais populares?

# lollipop chart
m <- list(
  l = 370)
p <- data %>%
        ggplot(aes(album_popularity,y=reorder(album_name,album_popularity),
                   color=remaster,
                   group=remaster)) +
            geom_segment(aes(x = 0, y = reorder(album_name,album_popularity), 
                             xend = album_popularity, 
                             yend = album_name)) + 
        geom_point() +
        theme(axis.title.y=element_blank())
ggplotly(p) %>%
  layout(autosize = F,
         margin = m)
LS0tCnRpdGxlOiAiRURBIFZJUyBjYW7Dp8O1ZXMiCnN1YnRpdGxlOiAnQW7DoWxpc2Ugc29icmUgSmFja3NvbiBkbyBQYW5kZWlybycKYXV0aG9yOiAiSm9zw6kgQmVuYXJkaSBkZSBTb3V6YSBOdW5lcyIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6CiAgICB0b2M6IHllcwogICAgdG9jX2Zsb2F0OiB5ZXMKICBodG1sX2RvY3VtZW50OgogICAgZGZfcHJpbnQ6IHBhZ2VkCiAgICB0b2M6IHllcwogICAgdG9jX2Zsb2F0OiB5ZXMKLS0tCgpgYGB7cn0KbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkoZHBseXIsIHdhcm4uY29uZmxpY3RzID0gRkFMU0UpCmxpYnJhcnkoaGVyZSkKbGlicmFyeShwbG90bHkpCmxpYnJhcnkoaGlnaGNoYXJ0ZXIpCmxpYnJhcnkoc3RyaW5ncikKdGhlbWVfc2V0KHRoZW1lX2J3KCkpCmBgYAoKIyMgRGF0YSBPdmVydmlldwoKYGBge3J9CmRhdGEgPC0gcmVhZF9jc3YoaGVyZTo6aGVyZSgiZGF0YS9qYWNrc29uLmNzdiIpLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgIGNvbF90eXBlcyA9IGNvbHMoCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgLmRlZmF1bHQgPSBjb2xfZG91YmxlKCksCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgYWxidW1fdXJpID0gY29sX2NoYXJhY3RlcigpLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGFsYnVtX25hbWUgPSBjb2xfY2hhcmFjdGVyKCksCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgYWxidW1faW1nID0gY29sX2NoYXJhY3RlcigpLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGFsYnVtX3JlbGVhc2VfZGF0ZSA9IGNvbF9jaGFyYWN0ZXIoKSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBhbGJ1bV9yZWxlYXNlX3llYXIgPSBjb2xfZGF0ZShmb3JtYXQgPSAiIiksCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgYWxidW1fcG9wdWxhcml0eSA9IGNvbF9pbnRlZ2VyKCksCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgdHJhY2tfbmFtZSA9IGNvbF9jaGFyYWN0ZXIoKSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICB0cmFja191cmkgPSBjb2xfY2hhcmFjdGVyKCksCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAga2V5ID0gY29sX2NoYXJhY3RlcigpLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG1vZGUgPSBjb2xfY2hhcmFjdGVyKCksCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgdGltZV9zaWduYXR1cmUgPSBjb2xfaW50ZWdlcigpLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGtleV9tb2RlID0gY29sX2NoYXJhY3RlcigpLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHRyYWNrX3BvcHVsYXJpdHkgPSBjb2xfaW50ZWdlcigpCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgKSkgJT4lCiAgICAgICAgIG11dGF0ZShhbGJ1bV9uYW1lID0gZ3N1YigiLiooMTk1NCkuKiIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIlRoZSBNdXNpYyBvZiBCcmF6aWwvSmFja3NvbiBkbyBQYW5kZWlybyIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBhbGJ1bV9uYW1lKSk7CmRhdGEgJT4lIAogICAgZ2xpbXBzZSgpCmBgYAoKIyMjIERhbmNlYWJpbGl0eQoKYGBge3J9CmRhdGEgJT4lIAogICAgZ2dwbG90KGFlcyhzYW1wbGU9ZGFuY2VhYmlsaXR5KSkgKyAKICAgICAgICBzdGF0X3FxKCkKYGBgCgpgYGB7cn0KaGNoYXJ0IChkYXRhJGRhbmNlYWJpbGl0eSwKICAgICAgICBjb2xvciA9ICIjQjcxQzFDIiwKICAgICAgICBuYW1lID0gIkRhbsOnYWJpbGlkYWRlIikKYGBgCgoKIyMjIFNwZWVjaGluZXMKCmBgYHtyfQpkYXRhICU+JSAKICAgIGdncGxvdChhZXMoc2FtcGxlPXNwZWVjaGluZXNzKSkgKyAKICAgICAgICBzdGF0X3FxKCkKYGBgCgpgYGB7cn0KaGNoYXJ0IChkYXRhJHNwZWVjaGluZXNzLAogICAgICAgIGNvbG9yID0gIiNCNzFDMUMiLAogICAgICAgIG5hbWUgPSAiVmVyYm9zaWRhZGUiKQpgYGAKCiMjIyBEdXJhdGlvbgoKYGBge3J9CmRhdGEgPC0gZGF0YSAlPiUKICAgIG11dGF0ZShkdXJhdGlvbl9zID0gZHVyYXRpb25fbXMvMTAwMCkKCmRhdGEgJT4lIAogICAgc2VsZWN0KGR1cmF0aW9uX3MpICU+JQogICAgZ2xpbXBzZSgpCmBgYAoKCmBgYHtyfQpkYXRhICU+JSAKICAgIGdncGxvdChhZXMoc2FtcGxlPWR1cmF0aW9uX3MpKSArIAogICAgICAgIHN0YXRfcXEoKQpgYGAKCmBgYHtyfQpoY2hhcnQgKGRhdGEkZHVyYXRpb25fcywKICAgICAgICBjb2xvciA9ICIjQjcxQzFDIiwKICAgICAgICBuYW1lID0gIkR1cmHDp8OjbyAocykiKQpgYGAKCiMjIyBSZW1hc3RlcgoKVWx0aW1vIGFsYnVtICIxOTgxOiBJc3NvIMOpIHF1ZSDDqSBGb3Jyw7MhIgoKYGBge3J9CmRhdGEgPC0gZGF0YSAlPiUKICAgIG11dGF0ZShyZW1hc3RlciA9IGFsYnVtX3JlbGVhc2VfZGF0ZSA+ICIxOTgxLTMwLTEyIikKCmRhdGEgJT4lIAogICAgc2VsZWN0KGFsYnVtX25hbWUsIGFsYnVtX3JlbGVhc2VfeWVhciwgcmVtYXN0ZXIpICU+JQogICAgc2FtcGxlX24oMTApCmBgYAoKCmBgYHtyfQp0ZW1wIDwtIGRhdGEgJT4lIAogICAgZGlzdGluY3QoYWxidW1fbmFtZSwKICAgICAgICAgICAgIC5rZWVwX2FsbCA9IFRSVUUpICU+JQogICAgbXV0YXRlKHJlbWFzdGVyID0gaWZlbHNlKHJlbWFzdGVyID09IFRSVUUsInJlbWFzdGVyaXphZG8iLCJvcmlnaW5hbCIpKQoKaGNoYXJ0KHRlbXAkcmVtYXN0ZXIsCiAgICAgICBjb2xvckJ5UG9pbnQgPSBUUlVFLAogICAgICAgbmFtZT0iw4FsYnVtIikKYGBgCgoKCiMjIE1haXMgZGlzY3Vyc28vZGnDoWxvZ28gKHNwZWVjaGluZXMpIHRlbSBlZmVpdG8gc29icmUgYSBkYW7Dp2FiaWxpZGFkZSBkYSBtw7pzaWNhPwoKYGBge3J9CnAgPC0gZGF0YSAlPiUgCiAgICAgICAgZGlzdGluY3QodHJhY2tfbmFtZSwgLmtlZXBfYWxsID0gVFJVRSkgJT4lCiAgICAgICAgZ2dwbG90KGFlcyh4PXNwZWVjaGluZXNzLAogICAgICAgICAgICAgICAgICAgeT1kYW5jZWFiaWxpdHkpKSArCiAgICAgICAgZ2VvbV9wb2ludChhbHBoYT0wLjQpIAoKZ2dwbG90bHkocCkKYGBgCgpgYGB7cn0KZGF0YSAlPiUgCiAgICBkaXN0aW5jdCh0cmFja19uYW1lLCAua2VlcF9hbGwgPSBUUlVFKSAlPiUKICAgIGdncGxvdChhZXMoc3BlZWNoaW5lc3MsZGFuY2VhYmlsaXR5KSkgKwogICAgc3RhdF9kZW5zaXR5MmQoYWVzKGZpbGwgPSAuLmxldmVsLi4pLCBnZW9tID0gInBvbHlnb24iKQpgYGAKCmBgYHtyfQpkYXRhICU+JQogICAgZ3JvdXBfYnkodHJhY2tfbmFtZSkgJT4lCiAgICB0b3BfbigxMCwgc3BlZWNoaW5lc3MpCmBgYAoKIyMgQ29tbyBvIHBhc3NhciBkb3MgYW5vcyBhZmV0YSBvIHRlbXBvIGRlIGR1cmHDp8OjbyBkYSBtw7pzaWNhPwoKYGBge3J9CnRlbXAgPC0KICAgIGRhdGEgJT4lIAogICAgZGlzdGluY3QoYWxidW1fbmFtZSwgLmtlZXBfYWxsID0gVFJVRSkgJT4lCiAgICBncm91cF9ieShhbGJ1bV9yZWxlYXNlX3llYXIpICU+JQogICAgc3VtbWFyaXNlKG9yaWdpbmFsX24gPSBzdW0oIXJlbWFzdGVyKSwKICAgICAgICAgICAgICByZW1hc3Rlcl9uID0gc3VtKHJlbWFzdGVyKSkKCmhpZ2hjaGFydCgpICU+JQoKICBoY194QXhpcyhjYXRlZ29yaWVzID0gdGVtcCRhbGJ1bV9yZWxlYXNlX3llYXIpICU+JQogIGhjX2FkZF9zZXJpZXModGVtcCRvcmlnaW5hbF9uLCAKICAgICAgICAgICAgICAgIHR5cGUgPSAiY29sdW1uIiwKICAgICAgICAgICAgICAgIGNvbG9yID0gIiNCNzFDMUMiLAogICAgICAgICAgICAgICAgbmFtZSA9ICJOw6NvIHJlbWFzdGVyaXphZG8iKSAlPiUKICBoY19hZGRfc2VyaWVzKHRlbXAkcmVtYXN0ZXJfbiwgCiAgICAgICAgICAgICAgICB0eXBlID0gImNvbHVtbiIsCiAgICAgICAgICAgICAgICBuYW1lID0gIlJlbWFzdGVyaXphZG8iKSAgJT4lCiAgICAgIGhjX3RpdGxlKHRleHQgPSAiTsO6bWVybyBkZSDDoWxidW5zIHBvciBhbm8iKQoKYGBgCgpgYGB7cn0KZGF0YSAlPiUgCiAgICBnZ3Bsb3QoYWVzKHg9YXMuZmFjdG9yKGFsYnVtX3JlbGVhc2VfeWVhciksCiAgICAgICAgICAgICAgIGR1cmF0aW9uX3MsCiAgICAgICAgICAgICAgIGdyb3VwPWFsYnVtX3JlbGVhc2VfeWVhciwKICAgICAgICAgICAgICAgY29sb3I9cmVtYXN0ZXIpKSArCiAgICBnZW9tX2JveHBsb3QocG9zaXRpb24gPSAiZG9kZ2UiLCBhbHBoYT0wLjYpICsKICAgIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGFuZ2xlID0gMzAsIGhqdXN0ID0gMSkpCgpgYGAKCiMjIE9zIGFsYnVucyByZWxhbsOnYWRvcy9yZW1hc3Rlcml6YWRvcyBzw6NvIG1haXMgcG9wdWxhcmVzPyAKCmBgYHtyfQojIGxvbGxpcG9wIGNoYXJ0Cm0gPC0gbGlzdCgKICBsID0gMzcwKQoKcCA8LSBkYXRhICU+JQogICAgICAgIGdncGxvdChhZXMoYWxidW1fcG9wdWxhcml0eSx5PXJlb3JkZXIoYWxidW1fbmFtZSxhbGJ1bV9wb3B1bGFyaXR5KSwKICAgICAgICAgICAgICAgICAgIGNvbG9yPXJlbWFzdGVyLAogICAgICAgICAgICAgICAgICAgZ3JvdXA9cmVtYXN0ZXIpKSArCiAgICAgICAgICAgIGdlb21fc2VnbWVudChhZXMoeCA9IDAsIHkgPSByZW9yZGVyKGFsYnVtX25hbWUsYWxidW1fcG9wdWxhcml0eSksIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgIHhlbmQgPSBhbGJ1bV9wb3B1bGFyaXR5LCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICB5ZW5kID0gYWxidW1fbmFtZSkpICsgCiAgICAgICAgZ2VvbV9wb2ludCgpICsKICAgICAgICB0aGVtZShheGlzLnRpdGxlLnk9ZWxlbWVudF9ibGFuaygpKQoKZ2dwbG90bHkocCkgJT4lCiAgbGF5b3V0KGF1dG9zaXplID0gRiwKICAgICAgICAgbWFyZ2luID0gbSkKYGBgCgo=